import re
from urllib import request

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36",
}

url = "https://search.51job.com/list/180200%252C080200%252C020000,000000,0000,00,9,99,python,2,1.html?lang=c&stype=&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&providesalary=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare="

#创建请求对象

req = request.Request(url,headers=headers,unverifiable=False)
#跳过ssl证书验证
response = request.urlopen(req)

# print(response.read().decode('gbk'))
html = response.read().decode('gbk')

#开始处理数据  拿到标签中间所有的内容

jobnum_re = '<div class="rt">(.*?)</div>'
coms = re.compile(jobnum_re,re.S)
# print(coms.findall(html)[0])
strs = coms.findall(html)[0]

#取出纯数字
num_re = '.*?(\d+).*'
num = re.findall(num_re,strs)
print(int(num[0]))


#获取第一个岗位信息

jobname_re = '<div class="el">(.*?)</div>'
joblist = re.findall(jobname_re,html,re.S)
joblist.pop(0)
# print(joblist[0])

# #匹配岗位内容
#
# jobnameone_re ='onmousedown="">(.*?)</a>'
# jobnameone_list = re.findall(jobnameone_re,joblist[1],re.S)
#
# print(jobnameone_list[0].strip())


for job in  joblist:
    jobnameone_re = 'onmousedown="">(.*?)</a>'
    jobnameone_list = re.findall(jobnameone_re,job,re.S)
    print("岗位名称:%s" % jobnameone_list[0].strip() )





